{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Direction</th>\n",
       "      <th>District</th>\n",
       "      <th>Elevator</th>\n",
       "      <th>Floor</th>\n",
       "      <th>Garden</th>\n",
       "      <th>Id</th>\n",
       "      <th>Layout</th>\n",
       "      <th>Price</th>\n",
       "      <th>Region</th>\n",
       "      <th>Renovation</th>\n",
       "      <th>Size</th>\n",
       "      <th>Year</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>东西</td>\n",
       "      <td>灯市口</td>\n",
       "      <td>未知</td>\n",
       "      <td>6</td>\n",
       "      <td>锡拉胡同21号院</td>\n",
       "      <td>101102647043</td>\n",
       "      <td>3室1厅</td>\n",
       "      <td>780.0</td>\n",
       "      <td>东城</td>\n",
       "      <td>精装</td>\n",
       "      <td>75.0</td>\n",
       "      <td>1988</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>南北</td>\n",
       "      <td>东单</td>\n",
       "      <td>无电梯</td>\n",
       "      <td>6</td>\n",
       "      <td>东华门大街</td>\n",
       "      <td>101102650978</td>\n",
       "      <td>2室1厅</td>\n",
       "      <td>705.0</td>\n",
       "      <td>东城</td>\n",
       "      <td>精装</td>\n",
       "      <td>60.0</td>\n",
       "      <td>1988</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>南西</td>\n",
       "      <td>崇文门</td>\n",
       "      <td>有电梯</td>\n",
       "      <td>16</td>\n",
       "      <td>新世界中心</td>\n",
       "      <td>101102672743</td>\n",
       "      <td>3室1厅</td>\n",
       "      <td>1400.0</td>\n",
       "      <td>东城</td>\n",
       "      <td>其他</td>\n",
       "      <td>210.0</td>\n",
       "      <td>1996</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>南</td>\n",
       "      <td>崇文门</td>\n",
       "      <td>未知</td>\n",
       "      <td>7</td>\n",
       "      <td>兴隆都市馨园</td>\n",
       "      <td>101102577410</td>\n",
       "      <td>1室1厅</td>\n",
       "      <td>420.0</td>\n",
       "      <td>东城</td>\n",
       "      <td>精装</td>\n",
       "      <td>39.0</td>\n",
       "      <td>2004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>南</td>\n",
       "      <td>陶然亭</td>\n",
       "      <td>有电梯</td>\n",
       "      <td>19</td>\n",
       "      <td>中海紫御公馆</td>\n",
       "      <td>101102574696</td>\n",
       "      <td>2室2厅</td>\n",
       "      <td>998.0</td>\n",
       "      <td>东城</td>\n",
       "      <td>精装</td>\n",
       "      <td>90.0</td>\n",
       "      <td>2010</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Direction District Elevator  Floor    Garden            Id Layout   Price  \\\n",
       "0        东西      灯市口       未知      6  锡拉胡同21号院  101102647043   3室1厅   780.0   \n",
       "1        南北       东单      无电梯      6     东华门大街  101102650978   2室1厅   705.0   \n",
       "2        南西      崇文门      有电梯     16     新世界中心  101102672743   3室1厅  1400.0   \n",
       "3         南      崇文门       未知      7    兴隆都市馨园  101102577410   1室1厅   420.0   \n",
       "4         南      陶然亭      有电梯     19    中海紫御公馆  101102574696   2室2厅   998.0   \n",
       "\n",
       "  Region Renovation   Size  Year  \n",
       "0     东城         精装   75.0  1988  \n",
       "1     东城         精装   60.0  1988  \n",
       "2     东城         其他  210.0  1996  \n",
       "3     东城         精装   39.0  2004  \n",
       "4     东城         精装   90.0  2010  "
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "f = open('清洗后的数据.csv')\n",
    "file = pd.read_csv(f, encoding = 'utf-8')\n",
    "file.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Direction</th>\n",
       "      <th>District</th>\n",
       "      <th>Elevator</th>\n",
       "      <th>Floor</th>\n",
       "      <th>Garden</th>\n",
       "      <th>Id</th>\n",
       "      <th>Layout</th>\n",
       "      <th>Price</th>\n",
       "      <th>Region</th>\n",
       "      <th>Renovation</th>\n",
       "      <th>Size</th>\n",
       "      <th>Year</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>东西</td>\n",
       "      <td>灯市口</td>\n",
       "      <td>未知</td>\n",
       "      <td>6</td>\n",
       "      <td>锡拉胡同21号院</td>\n",
       "      <td>101102647043</td>\n",
       "      <td>3室1厅</td>\n",
       "      <td>780.0</td>\n",
       "      <td>东城</td>\n",
       "      <td>精装</td>\n",
       "      <td>75.0</td>\n",
       "      <td>1988</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>南北</td>\n",
       "      <td>东单</td>\n",
       "      <td>无电梯</td>\n",
       "      <td>6</td>\n",
       "      <td>东华门大街</td>\n",
       "      <td>101102650978</td>\n",
       "      <td>2室1厅</td>\n",
       "      <td>705.0</td>\n",
       "      <td>东城</td>\n",
       "      <td>精装</td>\n",
       "      <td>60.0</td>\n",
       "      <td>1988</td>\n",
       "      <td>33</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>南西</td>\n",
       "      <td>崇文门</td>\n",
       "      <td>有电梯</td>\n",
       "      <td>16</td>\n",
       "      <td>新世界中心</td>\n",
       "      <td>101102672743</td>\n",
       "      <td>3室1厅</td>\n",
       "      <td>1400.0</td>\n",
       "      <td>东城</td>\n",
       "      <td>其他</td>\n",
       "      <td>210.0</td>\n",
       "      <td>1996</td>\n",
       "      <td>25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>南</td>\n",
       "      <td>崇文门</td>\n",
       "      <td>未知</td>\n",
       "      <td>7</td>\n",
       "      <td>兴隆都市馨园</td>\n",
       "      <td>101102577410</td>\n",
       "      <td>1室1厅</td>\n",
       "      <td>420.0</td>\n",
       "      <td>东城</td>\n",
       "      <td>精装</td>\n",
       "      <td>39.0</td>\n",
       "      <td>2004</td>\n",
       "      <td>17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>南</td>\n",
       "      <td>陶然亭</td>\n",
       "      <td>有电梯</td>\n",
       "      <td>19</td>\n",
       "      <td>中海紫御公馆</td>\n",
       "      <td>101102574696</td>\n",
       "      <td>2室2厅</td>\n",
       "      <td>998.0</td>\n",
       "      <td>东城</td>\n",
       "      <td>精装</td>\n",
       "      <td>90.0</td>\n",
       "      <td>2010</td>\n",
       "      <td>11</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Direction District Elevator  Floor    Garden            Id Layout   Price  \\\n",
       "0        东西      灯市口       未知      6  锡拉胡同21号院  101102647043   3室1厅   780.0   \n",
       "1        南北       东单      无电梯      6     东华门大街  101102650978   2室1厅   705.0   \n",
       "2        南西      崇文门      有电梯     16     新世界中心  101102672743   3室1厅  1400.0   \n",
       "3         南      崇文门       未知      7    兴隆都市馨园  101102577410   1室1厅   420.0   \n",
       "4         南      陶然亭      有电梯     19    中海紫御公馆  101102574696   2室2厅   998.0   \n",
       "\n",
       "  Region Renovation   Size  Year  age  \n",
       "0     东城         精装   75.0  1988   33  \n",
       "1     东城         精装   60.0  1988   33  \n",
       "2     东城         其他  210.0  1996   25  \n",
       "3     东城         精装   39.0  2004   17  \n",
       "4     东城         精装   90.0  2010   11  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 构建衍生变量\n",
    "file['age'] = 2021 - file['Year']\n",
    "file.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "22150"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "232"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(file['District'].unique())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4405"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(file['Garden'].unique())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "district和garden两个变量的种类过多，不建议进行入模分析，剔除"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Direction</th>\n",
       "      <th>Elevator</th>\n",
       "      <th>Floor</th>\n",
       "      <th>Layout</th>\n",
       "      <th>Region</th>\n",
       "      <th>Renovation</th>\n",
       "      <th>Size</th>\n",
       "      <th>age</th>\n",
       "      <th>Price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>东西</td>\n",
       "      <td>未知</td>\n",
       "      <td>6</td>\n",
       "      <td>3室1厅</td>\n",
       "      <td>东城</td>\n",
       "      <td>精装</td>\n",
       "      <td>75.0</td>\n",
       "      <td>33</td>\n",
       "      <td>780.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>南北</td>\n",
       "      <td>无电梯</td>\n",
       "      <td>6</td>\n",
       "      <td>2室1厅</td>\n",
       "      <td>东城</td>\n",
       "      <td>精装</td>\n",
       "      <td>60.0</td>\n",
       "      <td>33</td>\n",
       "      <td>705.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>南西</td>\n",
       "      <td>有电梯</td>\n",
       "      <td>16</td>\n",
       "      <td>3室1厅</td>\n",
       "      <td>东城</td>\n",
       "      <td>其他</td>\n",
       "      <td>210.0</td>\n",
       "      <td>25</td>\n",
       "      <td>1400.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>南</td>\n",
       "      <td>未知</td>\n",
       "      <td>7</td>\n",
       "      <td>1室1厅</td>\n",
       "      <td>东城</td>\n",
       "      <td>精装</td>\n",
       "      <td>39.0</td>\n",
       "      <td>17</td>\n",
       "      <td>420.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>南</td>\n",
       "      <td>有电梯</td>\n",
       "      <td>19</td>\n",
       "      <td>2室2厅</td>\n",
       "      <td>东城</td>\n",
       "      <td>精装</td>\n",
       "      <td>90.0</td>\n",
       "      <td>11</td>\n",
       "      <td>998.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  Direction Elevator  Floor Layout Region Renovation   Size  age   Price\n",
       "0        东西       未知      6   3室1厅     东城         精装   75.0   33   780.0\n",
       "1        南北      无电梯      6   2室1厅     东城         精装   60.0   33   705.0\n",
       "2        南西      有电梯     16   3室1厅     东城         其他  210.0   25  1400.0\n",
       "3         南       未知      7   1室1厅     东城         精装   39.0   17   420.0\n",
       "4         南      有电梯     19   2室2厅     东城         精装   90.0   11   998.0"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "col_list = ['Direction', 'Elevator', 'Floor', 'Layout','Region', 'Renovation', 'Size', 'age', 'Price']\n",
    "file_a = file[col_list]\n",
    "file_a.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Floor</th>\n",
       "      <th>Size</th>\n",
       "      <th>age</th>\n",
       "      <th>Price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Floor</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.005616</td>\n",
       "      <td>-0.367867</td>\n",
       "      <td>0.119933</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Size</th>\n",
       "      <td>0.005616</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.285581</td>\n",
       "      <td>0.670762</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>age</th>\n",
       "      <td>-0.367867</td>\n",
       "      <td>-0.285581</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.068871</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Price</th>\n",
       "      <td>0.119933</td>\n",
       "      <td>0.670762</td>\n",
       "      <td>-0.068871</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          Floor      Size       age     Price\n",
       "Floor  1.000000  0.005616 -0.367867  0.119933\n",
       "Size   0.005616  1.000000 -0.285581  0.670762\n",
       "age   -0.367867 -0.285581  1.000000 -0.068871\n",
       "Price  0.119933  0.670762 -0.068871  1.000000"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_a.corr()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "上面的矩阵为连续性变量之间的相关性研究，可以看到，三个连续性变量之间并没有存在强相关性（以0.75为阈值），故可以纳入下一步中进行离散型、连续性之间的相关系数研究"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "使用sklearn中的preprocessing对离散型数据进行数据化处理，再进行相关系数分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import preprocessing\n",
    "dis_col = ['Direction', 'Elevator', 'Layout', 'Region', 'Renovation']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "D:\\conda\\lib\\site-packages\\ipykernel_launcher.py:3: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  This is separate from the ipykernel package so we can avoid doing imports until\n"
     ]
    }
   ],
   "source": [
    "for x in dis_col:\n",
    "    label = preprocessing.LabelEncoder()\n",
    "    file_a[x] = label.fit_transform(file_a[x])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Direction</th>\n",
       "      <th>Elevator</th>\n",
       "      <th>Floor</th>\n",
       "      <th>Layout</th>\n",
       "      <th>Region</th>\n",
       "      <th>Renovation</th>\n",
       "      <th>Size</th>\n",
       "      <th>age</th>\n",
       "      <th>Price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Direction</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.042997</td>\n",
       "      <td>-0.132053</td>\n",
       "      <td>0.158905</td>\n",
       "      <td>0.032417</td>\n",
       "      <td>0.020585</td>\n",
       "      <td>0.136595</td>\n",
       "      <td>-0.026882</td>\n",
       "      <td>0.034376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Elevator</th>\n",
       "      <td>-0.042997</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.269168</td>\n",
       "      <td>-0.019047</td>\n",
       "      <td>0.003251</td>\n",
       "      <td>0.054124</td>\n",
       "      <td>0.046044</td>\n",
       "      <td>-0.226384</td>\n",
       "      <td>0.053855</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Floor</th>\n",
       "      <td>-0.132053</td>\n",
       "      <td>0.269168</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.152220</td>\n",
       "      <td>-0.082165</td>\n",
       "      <td>0.097405</td>\n",
       "      <td>0.005616</td>\n",
       "      <td>-0.367867</td>\n",
       "      <td>0.119933</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Layout</th>\n",
       "      <td>0.158905</td>\n",
       "      <td>-0.019047</td>\n",
       "      <td>-0.152220</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.050219</td>\n",
       "      <td>0.040789</td>\n",
       "      <td>0.733593</td>\n",
       "      <td>-0.102686</td>\n",
       "      <td>0.459345</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Region</th>\n",
       "      <td>0.032417</td>\n",
       "      <td>0.003251</td>\n",
       "      <td>-0.082165</td>\n",
       "      <td>0.050219</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.036311</td>\n",
       "      <td>0.040310</td>\n",
       "      <td>0.029547</td>\n",
       "      <td>0.037933</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Renovation</th>\n",
       "      <td>0.020585</td>\n",
       "      <td>0.054124</td>\n",
       "      <td>0.097405</td>\n",
       "      <td>0.040789</td>\n",
       "      <td>0.036311</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.077743</td>\n",
       "      <td>-0.133570</td>\n",
       "      <td>0.087387</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Size</th>\n",
       "      <td>0.136595</td>\n",
       "      <td>0.046044</td>\n",
       "      <td>0.005616</td>\n",
       "      <td>0.733593</td>\n",
       "      <td>0.040310</td>\n",
       "      <td>0.077743</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.285581</td>\n",
       "      <td>0.670762</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>age</th>\n",
       "      <td>-0.026882</td>\n",
       "      <td>-0.226384</td>\n",
       "      <td>-0.367867</td>\n",
       "      <td>-0.102686</td>\n",
       "      <td>0.029547</td>\n",
       "      <td>-0.133570</td>\n",
       "      <td>-0.285581</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>-0.068871</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Price</th>\n",
       "      <td>0.034376</td>\n",
       "      <td>0.053855</td>\n",
       "      <td>0.119933</td>\n",
       "      <td>0.459345</td>\n",
       "      <td>0.037933</td>\n",
       "      <td>0.087387</td>\n",
       "      <td>0.670762</td>\n",
       "      <td>-0.068871</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            Direction  Elevator     Floor    Layout    Region  Renovation  \\\n",
       "Direction    1.000000 -0.042997 -0.132053  0.158905  0.032417    0.020585   \n",
       "Elevator    -0.042997  1.000000  0.269168 -0.019047  0.003251    0.054124   \n",
       "Floor       -0.132053  0.269168  1.000000 -0.152220 -0.082165    0.097405   \n",
       "Layout       0.158905 -0.019047 -0.152220  1.000000  0.050219    0.040789   \n",
       "Region       0.032417  0.003251 -0.082165  0.050219  1.000000    0.036311   \n",
       "Renovation   0.020585  0.054124  0.097405  0.040789  0.036311    1.000000   \n",
       "Size         0.136595  0.046044  0.005616  0.733593  0.040310    0.077743   \n",
       "age         -0.026882 -0.226384 -0.367867 -0.102686  0.029547   -0.133570   \n",
       "Price        0.034376  0.053855  0.119933  0.459345  0.037933    0.087387   \n",
       "\n",
       "                Size       age     Price  \n",
       "Direction   0.136595 -0.026882  0.034376  \n",
       "Elevator    0.046044 -0.226384  0.053855  \n",
       "Floor       0.005616 -0.367867  0.119933  \n",
       "Layout      0.733593 -0.102686  0.459345  \n",
       "Region      0.040310  0.029547  0.037933  \n",
       "Renovation  0.077743 -0.133570  0.087387  \n",
       "Size        1.000000 -0.285581  0.670762  \n",
       "age        -0.285581  1.000000 -0.068871  \n",
       "Price       0.670762 -0.068871  1.000000  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_a.corr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Direction</th>\n",
       "      <th>Elevator</th>\n",
       "      <th>Floor</th>\n",
       "      <th>Layout</th>\n",
       "      <th>Region</th>\n",
       "      <th>Renovation</th>\n",
       "      <th>Size</th>\n",
       "      <th>age</th>\n",
       "      <th>Price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>75.0</td>\n",
       "      <td>33</td>\n",
       "      <td>780.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>9</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>60.0</td>\n",
       "      <td>33</td>\n",
       "      <td>705.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>10</td>\n",
       "      <td>1</td>\n",
       "      <td>16</td>\n",
       "      <td>16</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>210.0</td>\n",
       "      <td>25</td>\n",
       "      <td>1400.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>39.0</td>\n",
       "      <td>17</td>\n",
       "      <td>420.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>19</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>90.0</td>\n",
       "      <td>11</td>\n",
       "      <td>998.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Direction  Elevator  Floor  Layout  Region  Renovation   Size  age   Price\n",
       "0          3         2      6      16       0           3   75.0   33   780.0\n",
       "1          9         0      6       9       0           3   60.0   33   705.0\n",
       "2         10         1     16      16       0           0  210.0   25  1400.0\n",
       "3          7         2      7       2       0           3   39.0   17   420.0\n",
       "4          7         1     19      10       0           3   90.0   11   998.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "file_a.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看到，layout和size的相关系数最大，为0.73，但仍处于阈值0.75之下，其他变量之间线性相关性也不存在高度共线性，因此所有变量均可入模"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_a.to_csv('建模数据.csv', encoding='gb18030', index = False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
